In [1]:
#Warning ignorance if generated

import warnings
warnings.filterwarnings("ignore")
In [2]:
#import necessary python packages for single-cell RNA SEQ analysis

import scanpy as sc #software suite of tools for single-cell analysis in python
import besca as bc #internal BEDA package for single cell analysis
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import scipy
import anndata as ad
from scipy.sparse import csr_matrix
import scanpy.external as sce
from harmony import harmonize
import umap.umap_ as umap
from scipy import io
print(ad.__version__)

sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)

# gives error!! sc.logging.print_versions()
INFO:torch.distributed.nn.jit.instantiator:Created a temporary directory at /tmp/tmp2ghhn8aa
INFO:torch.distributed.nn.jit.instantiator:Writing /tmp/tmp2ghhn8aa/_remote_module_non_scriptable.py
INFO:lightning_fabric.utilities.seed:Global seed set to 0
0.9.1
In [3]:
#Reading last saved annoatated data object written in h5ad data format. 
#We used similar adata variable to make similar previous data analysis 

save_file = '/home/jana/scanpy_qc_filtered_pbmcs_for_sarcoid.h5ad'
adata=sc.read_h5ad(save_file)
In [4]:
#Display last saved adata object

print(adata)
AnnData object with n_obs × n_vars = 67346 × 26113
    obs: 'type', 'sample', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'percent_mt2', 'n_counts', 'n_genes', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'leiden', 'leiden_1.0', 'leiden_0.7', 'leiden_0.8', 'initial_annotation'
    var: 'gene_ids', 'feature_types', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'dendrogram_leiden_0.7', 'doublet_info_colors', 'hvg', 'initial_annotation_colors', 'leiden', 'leiden_0.7_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'leiden_colors', 'log1p', 'neighbors', 'pca', 'rank_genes_groups', 'sample_colors', 'umap'
    obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
    obsp: 'connectivities', 'distances'
In [5]:
#metadata of saved adata object
adata.obs
Out[5]:
type sample batch n_genes_by_counts total_counts total_counts_mt pct_counts_mt total_counts_ribo pct_counts_ribo total_counts_hb ... n_counts n_genes doublet_scores predicted_doublets doublet_info leiden leiden_1.0 leiden_0.7 leiden_0.8 initial_annotation
AAACCCAAGACATAAC-1-0 Sarc Sarc-1 0 385 585.0 27.0 4.615385 32.0 5.470086 1.0 ... 585.0 385 0.027344 False False 29 22 5 19 CD14+ Mono
AAACCCAAGAGGCGGA-1-0 Sarc Sarc-1 0 2191 5556.0 423.0 7.613391 613.0 11.033117 2.0 ... 5556.0 2191 0.064067 False False 10 7 6 7 CD14+ Mono
AAACCCAAGCGTACAG-1-0 Sarc Sarc-1 0 936 2864.0 253.0 8.833798 1131.0 39.490223 0.0 ... 2864.0 936 0.021978 False False 5 5 2 2 B cell
AAACCCAAGGTACAAT-1-0 Sarc Sarc-1 0 3622 11581.0 736.0 6.355237 1679.0 14.497885 2.0 ... 11581.0 3622 0.089888 False False 8 3 0 3 CD14+ Mono
AAACCCACAGCGTACC-1-0 Sarc Sarc-1 0 2219 6849.0 536.0 7.825960 1114.0 16.265148 0.0 ... 6849.0 2219 0.016505 False False 13 10 9 10 CD16 Mono
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
TTTGTTGGTTCAAGGG-1-5 healthy healthy-3 5 1541 5318.0 271.0 5.095901 2216.0 41.669800 1.0 ... 5318.0 1541 0.036342 False False 11 0 1 1 Naive CD4T
TTTGTTGTCACCTGGG-1-5 healthy healthy-3 5 1880 5073.0 345.0 6.800710 1458.0 28.740391 0.0 ... 5073.0 1880 0.073171 False False 15 6 7 6 Mixed
TTTGTTGTCATTGAGC-1-5 healthy healthy-3 5 1696 6433.0 331.0 5.145344 2995.0 46.556816 0.0 ... 6433.0 1696 0.064109 False False 4 0 1 1 Naive CD4T
TTTGTTGTCCGATGTA-1-5 healthy healthy-3 5 3787 12527.0 777.0 6.202602 1858.0 14.831964 0.0 ... 12527.0 3787 0.074398 False False 8 3 0 3 CD14+ Mono
TTTGTTGTCGTGGCTG-1-5 healthy healthy-3 5 1639 4419.0 373.0 8.440824 950.0 21.498077 0.0 ... 4419.0 1639 0.012614 False False 7 6 7 6 Mixed

67346 rows × 22 columns

In [6]:
# Displaying Leiden clustering with chosen of 0.7 resolution 

sc.pl.umap(adata, color="leiden_0.7",  use_raw=False, legend_loc="on data")
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning:

The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.

/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning:

No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored

In [7]:
# Reclustering cluster 2 Temporary annotation B-cell
#Isolate cluster 2 from main leiden clusters named as cluster2_adata

cluster2_adata = adata[adata.obs['leiden_0.7'].isin(['2' ]),:]
In [8]:
#Looking the annotated cluster2_adata

print (cluster2_adata)
View of AnnData object with n_obs × n_vars = 7217 × 26113
    obs: 'type', 'sample', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'percent_mt2', 'n_counts', 'n_genes', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'leiden', 'leiden_1.0', 'leiden_0.7', 'leiden_0.8', 'initial_annotation'
    var: 'gene_ids', 'feature_types', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'dendrogram_leiden_0.7', 'doublet_info_colors', 'hvg', 'initial_annotation_colors', 'leiden', 'leiden_0.7_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'log1p', 'neighbors', 'pca', 'rank_genes_groups', 'sample_colors', 'umap'
    obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
    obsp: 'connectivities', 'distances'
In [9]:
#Computing with a series of resolution parameters and silhouette_scores. 

#Like various algorithms, Leiden has also a parameter named the resolution. 
#It can control the coarseness of the clustering. 
#Higher values of resolution mean it leads to more clusters.

#Computing Silhouette Coefficient or Silhouette Score, a metric that was used to calculate the goodness of a clustering. 
# -1 <= silhouette score<= 1.

from sklearn.metrics import silhouette_score

# Define a list of resolution parameters
#resolutions = [round(r, 2) for r in [.05] + list(np.linspace(.1, 1.6, 16))]
resolutions=[0.1,0.2,0.3, 0.4,0.5, 0.6, 0.7,0.8, 1.0, 1.2]
# Print a message indicating the start of the computation
print("Computing silhouette scores with different resolution parameters")

# Iterate over each resolution parameter and compute the silhouette score
for resolution in resolutions:
    # Apply the Leiden clustering algorithm with the current resolution parameter
    sc.tl.leiden(cluster2_adata, resolution=resolution)
    
    # Compute the silhouette score for the clustering result
    silhouette2 = silhouette_score(cluster2_adata.obsm['X_umap'], cluster2_adata.obs[f'leiden'])
    
    # Print the silhouette score for the current resolution parameter
    print(f"Silhouette score for resolution {resolution}: {silhouette2}")
Computing silhouette scores with different resolution parameters
running Leiden clustering
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/tools/_leiden.py:158: ImplicitModificationWarning:

Trying to modify attribute `.obs` of view, initializing view as actual.

    finished: found 2 clusters and added
    'leiden', the cluster labels (adata.obs, categorical) (0:00:01)
Silhouette score for resolution 0.1: 0.3733758330345154
running Leiden clustering
    finished: found 3 clusters and added
    'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
Silhouette score for resolution 0.2: 0.4038565754890442
running Leiden clustering
    finished: found 4 clusters and added
    'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
Silhouette score for resolution 0.3: 0.2567930221557617
running Leiden clustering
    finished: found 6 clusters and added
    'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
Silhouette score for resolution 0.4: 0.17308494448661804
running Leiden clustering
    finished: found 8 clusters and added
    'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
Silhouette score for resolution 0.5: 0.07684057950973511
running Leiden clustering
    finished: found 9 clusters and added
    'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
Silhouette score for resolution 0.6: 0.1091974675655365
running Leiden clustering
    finished: found 9 clusters and added
    'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
Silhouette score for resolution 0.7: 0.11528532952070236
running Leiden clustering
    finished: found 10 clusters and added
    'leiden', the cluster labels (adata.obs, categorical) (0:00:01)
Silhouette score for resolution 0.8: 0.11307550221681595
running Leiden clustering
    finished: found 14 clusters and added
    'leiden', the cluster labels (adata.obs, categorical) (0:00:01)
Silhouette score for resolution 1.0: 0.04247363656759262
running Leiden clustering
    finished: found 17 clusters and added
    'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
Silhouette score for resolution 1.2: 0.0004957361961714923
In [12]:
#ReClustering of Cluster 2 neighborhood graph using Leiden Clustering algorithm with Resolution of 0.1

sc.pp.neighbors(cluster2_adata, n_neighbors=10, n_pcs=50)
sc.tl.leiden(cluster2_adata, resolution=0.1)
sc.tl.umap(cluster2_adata)
computing neighbors
    using 'X_pca' with n_pcs = 50
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:02)
running Leiden clustering
    finished: found 2 clusters and added
    'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
computing UMAP
    finished: added
    'X_umap', UMAP coordinates (adata.obsm) (0:00:24)
In [13]:
#Cluster 2 Some geneexpresion of all B-cell markers genes in UMAP space
sc.pl.umap(cluster2_adata, color = ['leiden','CD27','CD24','BCL6','CD40','CD38','CD74','AIM2','TCL1A', 'RPL18A','PRDM1'], wspace = 0.2,  legend_loc="on data")
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning:

The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.

/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning:

No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored

In [14]:
# After Violin plot seeking the expression changes different markers genes, 
#trying to make a temporary table for manual annotations

from tabulate import tabulate

bcell_table = [
         ['Cluster no', 'Type of Cell'], 
         ['0', 'B-Naive'], 
         ['1', 'B Memory'], 
         
        ]

print(tabulate(bcell_table))
----------  ------------
Cluster no  Type of Cell
0           B-Naive
1           B Memory
----------  ------------
In [15]:
#B-cell markers provide by AZIMUTH tool

B_intermediate = ['MS4A1', 'TNFRSF13B', 'IGHM', 'IGHD', 'AIM2', 'CD79A', 'LINC01857', 'RALGPS2', 'BANK1', 'CD79B']

B_memory = ['MS4A1', 'COCH', 'AIM2', 'BANK1', 'SSPN', 'CD79A', 'TEX9', 'RALGPS2', 'TNFRSF13C', 'LINC01781']

B_naive	= ['IGHM', 'IGHD', 'CD79A', 'IL4R', 'MS4A1', 'CXCR4', 'BTG1', 'TCL1A', 'CD79B', 'YBX3']
In [16]:
#Dotplot

sc.pl.dotplot(adata, B_memory, groupby='leiden_0.7', dendrogram=True)
print ("--------")
#Dotplot

sc.pl.dotplot(cluster2_adata, B_memory, groupby='leiden', dendrogram=True)
print ("--------")
#Dotplot

sc.pl.dotplot(cluster2_adata, B_naive, groupby='leiden', dendrogram=True)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_dotplot.py:749: UserWarning:

No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored

--------
WARNING: Dendrogram not added. Dendrogram is added only when the number of categories to plot > 2
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_dotplot.py:749: UserWarning:

No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored

--------
WARNING: Dendrogram not added. Dendrogram is added only when the number of categories to plot > 2
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_dotplot.py:749: UserWarning:

No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored

In [17]:
#cluster 2 reclustered annotations

new_2_cluster_names = ['B_naive', #0
                     'B_memory', #1
                     ] 

bc.tl.annotate_cells_clustering(adata=cluster2_adata, clustering_label='leiden', new_annotation_label='initial_annotation_2', new_cluster_labels=new_2_cluster_names)
In [18]:
#cluster 2 reclustered annotations UMAP

sc.pl.umap(cluster2_adata, color = ['leiden','initial_annotation_2'], legend_loc="on data")
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning:

The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.

/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning:

No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored

/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning:

No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored

In [19]:
# Reclustering cluster 17 Temporary annotation Mixed cells

#Isolate cluster 17 from main leiden clusters named as cluster2_adata

cluster17_adata = adata[adata.obs['leiden_0.7'].isin(['17' ]),:]
In [20]:
#ReClustering of Cluster 17 neighborhood graph using Leiden Clustering algorithm with Resolution of 0.1
sc.pp.neighbors(cluster17_adata, n_neighbors=10, n_pcs=50)
sc.tl.leiden(cluster17_adata, resolution=0.1)
sc.tl.umap(cluster17_adata)
computing neighbors
    using 'X_pca' with n_pcs = 50
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
running Leiden clustering
    finished: found 4 clusters and added
    'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
computing UMAP
    finished: added
    'X_umap', UMAP coordinates (adata.obsm) (0:00:01)
In [21]:
#Finding Marker genes inside cluster 17 mixed

#Logarithmize the data

sc.pp.log1p(cluster17_adata)

#Finding marker genes using Wilcoxon rank-sum method

sc.tl.rank_genes_groups(cluster17_adata, 'leiden', method='wilcoxon')

#Showing Top 25 Genes scorer of each clusters

sc.pl.rank_genes_groups(cluster17_adata, n_genes=25, sharey=False)
WARNING: adata.X seems to be already log-transformed.
ranking genes
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:01)
In [22]:
#dnT markers provided by Azimuth and checked markers genes inside cluster 17 mixed

sc.pl.umap(cluster17_adata, color = ['leiden','PTPN3', 'MIR4422HG', 'NUCB2', 'CAV1', 'DTHD1', 'GZMA', 'MYB', 'FXYD2', 'GZMK', 'AC004585.1'], legend_loc="on data")
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning:

The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.

/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning:

No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored

In [23]:
# Displaying a table of Top 5 highly differentialy genes inside cluster 17 mixed reculstered


pd.DataFrame(cluster17_adata.uns['rank_genes_groups']['names']).head(5)
Out[23]:
0 1 2 3
0 HMGB2 MZB1 NKG7 GZMK
1 HMGN2 UBE2J1 GZMB TMSB4X
2 HMGB1 TNFRSF17 CTSW AC004585.1
3 H2AFZ TXNDC5 GNLY GPR183
4 MKI67 JCHAIN PRF1 TCF7
In [24]:
#Nk_proliferation markers genes inside cluster 17 reclustered


sc.pl.umap(cluster17_adata, color = ['leiden','MKI67', 'KLRF1', 'TYMS', 'TRDC', 'TOP2A', 'FCER1G', 'PCLAF', 'CD247', 'CLSPN', 'ASPM'], legend_loc="on data")
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning:

The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.

/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning:

No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored

In [25]:
#Plasmablast markers checking cluster 17 reclustered

#Plasmablast = ['IGHA2', 'MZB1', 'TNFRSF17', 'DERL3', 'TXNDC5', 'TNFRSF13B', 'POU2AF1', 'CPNE5', 'HNT5DC2']


sc.pl.umap(cluster17_adata, color = ['leiden','IGHA2', 'MZB1', 'TNFRSF17', 'DERL3', 'TXNDC5', 'TNFRSF13B', 'POU2AF1', 'CPNE5'], legend_loc="on data")
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning:

The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.

/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning:

No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored

In [26]:
#Cluster 17 reclustered new annotation

new_17_cluster_names = ['NK_proli', #0
                     'Plasmablast', #1
                     'NK', #2
                     'dnT', #3
                     ] 

bc.tl.annotate_cells_clustering(adata=cluster17_adata, clustering_label='leiden', new_annotation_label='initial_annotation_17', new_cluster_labels=new_17_cluster_names)
In [27]:
#cluster 17 reclustering cell annotation

sc.pl.umap(cluster17_adata, color = ['leiden','initial_annotation_17'], legend_loc="on data")
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning:

The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.

/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning:

No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored

/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning:

No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored

In [28]:
# Reclustering cluster 7 Temporary mixed cells
#Isolate cluster 7 from main leiden clusters named as cluster7_adata

cluster7_adata = adata[adata.obs['leiden_0.7'].isin(['7' ]),:]
In [29]:
#ReClustering of Cluster 7 neighborhood graph using Leiden Clustering algorithm with Resolution of 0.3

sc.pp.neighbors(cluster7_adata, n_neighbors=10, n_pcs=50)
sc.tl.leiden(cluster7_adata, resolution=0.3)
sc.tl.umap(cluster7_adata)
computing neighbors
    using 'X_pca' with n_pcs = 50
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:02)
running Leiden clustering
    finished: found 3 clusters and added
    'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
computing UMAP
    finished: added
    'X_umap', UMAP coordinates (adata.obsm) (0:00:17)
In [30]:
sc.pl.umap(cluster7_adata, color = ['leiden','IL32','IL7R','TRAC', 'TRDC','TRBC1','TRGC1','CD8B', 'S100B', 'CCR7', 'CD8A','CD4', 'GNLY', 'TYROBP', 'NKG7'], legend_loc="on data", wspace = 0.2)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning:

The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.

/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning:

No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored

In [31]:
#Finding Marker genes

#Logarithmize the data

sc.pp.log1p(cluster7_adata)

#Finding marker genes using Wilcoxon rank-sum method

sc.tl.rank_genes_groups(cluster7_adata, 'leiden', method='wilcoxon')

#Showing Top 25 Genes scorer of each clusters

sc.pl.rank_genes_groups(cluster7_adata, n_genes=25, sharey=False)
WARNING: adata.X seems to be already log-transformed.
ranking genes
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:16)
In [32]:
# Displaying a table of Top 5 highly differentialy highly scorer expressed genesfor each clusters in Leiden clustering


pd.DataFrame(cluster7_adata.uns['rank_genes_groups']['names']).head(5)
Out[32]:
0 1 2
0 NKG7 IL7R S100A4
1 GNLY GZMK CD52
2 GZMB RPL13 B2M
3 KLRD1 LTB SH3BGRL3
4 GZMH EEF1A1 ITGB1
In [33]:
# Reclustering cluster 0 of cluster 7 Temporary mixed cells
#Isolate cluster0 from cluster 7 mixed main leiden clusters named as cluster7_0_adata

cluster7_0_adata = cluster7_adata[cluster7_adata.obs['leiden'].isin(['0']),:]
In [34]:
#cluster 0 of cluster 7 reclustering object
cluster7_0_adata
Out[34]:
View of AnnData object with n_obs × n_vars = 3214 × 26113
    obs: 'type', 'sample', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'percent_mt2', 'n_counts', 'n_genes', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'leiden', 'leiden_1.0', 'leiden_0.7', 'leiden_0.8', 'initial_annotation'
    var: 'gene_ids', 'feature_types', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'dendrogram_leiden_0.7', 'doublet_info_colors', 'hvg', 'initial_annotation_colors', 'leiden', 'leiden_0.7_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'log1p', 'neighbors', 'pca', 'rank_genes_groups', 'sample_colors', 'umap', 'leiden_colors'
    obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
    obsp: 'connectivities', 'distances'
In [35]:
#Cluster 0 isolated from Cluster 7 reclusters

sc.pl.umap(cluster7_0_adata, color = ['leiden','NKG7'], legend_loc="on data", wspace = 0.2)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning:

The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.

/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning:

No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored

In [36]:
#ReClustering of Cluster 0 from cluster 7 neighborhood graph using Leiden Clustering algorithm with Resolution of 0.3

sc.pp.neighbors(cluster7_0_adata, n_neighbors=10, n_pcs=50)
sc.tl.leiden(cluster7_0_adata, resolution=0.4)
sc.tl.umap(cluster7_0_adata)
computing neighbors
    using 'X_pca' with n_pcs = 50
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
running Leiden clustering
    finished: found 3 clusters and added
    'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
computing UMAP
    finished: added
    'X_umap', UMAP coordinates (adata.obsm) (0:00:11)
In [37]:
#Cluster 0 isolated from Cluster 7 reclusters

sc.pl.umap(cluster7_0_adata, color = ['leiden','NKG7'], legend_loc="on data", wspace = 0.2)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning:

The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.

/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning:

No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored

In [38]:
#Finding Marker genes

#Logarithmize the data

sc.pp.log1p(cluster7_0_adata)

#Finding marker genes using Wilcoxon rank-sum method

sc.tl.rank_genes_groups(cluster7_0_adata, 'leiden', method='wilcoxon')

#Showing Top 25 Genes scorer of each clusters

sc.pl.rank_genes_groups(cluster7_0_adata, n_genes=25, sharey=False)
WARNING: adata.X seems to be already log-transformed.
ranking genes
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:09)
In [39]:
# Displaying a table of Top 5 highly differentialy highly scorer expressed genesfor each clusters in Leiden clustering


pd.DataFrame(cluster7_0_adata.uns['rank_genes_groups']['names']).head(5)
Out[39]:
0 1 2
0 GNLY CD8A TRGC1
1 TYROBP CD8B TRDC
2 KLRC3 TRBC2 KLRC1
3 KLRC2 TRAC KLRB1
4 KLRF1 THEMIS CD247
In [40]:
#Some markers genes exprssed inside cluster 0 of cluster 7 (cluster7_0)
sc.pl.umap(cluster7_0_adata, color = ['leiden','IL32','IL7R','TRAC', 'TRDC','TRBC1','TRGC1','CD8B', 'S100B', 'CCR7', 'CD8A','CD4', 'GNLY', 'TYROBP', 'NKG7'], legend_loc="on data", wspace = 0.2)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning:

The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.

/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning:

No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored

In [ ]: